home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Fritz: All Fritz
/
All Fritz.zip
/
All Fritz
/
FILES
/
BUSINESS
/
STATA3.LZH
/
REGRESS.TUT
< prev
next >
Wrap
Text File
|
1988-08-30
|
15KB
|
423 lines
set output error
set display page 23
set more 1
#delimit ;
di _n(5) in wh
" ___ ____ ____ ____ ____ tm" _n
" /__ / ____/ / ____/" _n
"___/ / /___/ / /___/ Estimating Regression Models" _n
"------------------------------------------------------------" _n(2) ;
di in gr
"This tutorial provides an overview of the Stata commands for estimating" _n
"multiple-regression models, including two-stage least squares. The com-" _n
"mands we discuss are:" _n(2)
_col(12) in wh "regress test predict stepwise" _n(2)
in gr
"We begin by using some Census data we have stored in the file census.dta." _n
"Right now, this tutorial is secretly using the data, dropping some of the" _n
"variables, and generating a few new ones." _n(2)
"We will show you the result on the next screen:" _n(6) ;
#delimit cr
mac def path
capture run nullfile.tut
if _rc {
mac def path "\stata\"
capture run %path`nullfile.tut
if _rc {
mac def path "/usr/stata/"
capture run %path`nullfile.tut
if _rc {
#delimit ;
di in red
"I cannot find the other tutorial files. I have looked in the current" _n
"directory and in \stata (DOS) or /usr/stata (Unix). Is Stata installed" _n
"correctly?" _n(2)
"In any case, I cannot run the tutorial." ;
#delimit cr
exit
}
}
}
macro define F5 "do %path`contents.tut;"
macro define F6 "do %path`regress.tut;"
use %path`census, clear
keep state medage death pop popurban
gen pcturban = 100* popurban/pop
gen drate = death/pop
drop death popurban
gen medagesq=medage^2
label variable medagesq "Median age squared"
label variable drate "Death Rate"
label variable pcturban "Percent urban population"
#delimit ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh ". describe" ;
noisily describe ;
di _n in wh ". summarize" ;
noisily summarize ;
set more 0 ; more ; set more 1 ;
di _n(8) in wh _dup(79) "-" _n in gr
"The " in wh "regress"
in gr " command estimates linear regressions. Its syntax is" _n(2)
in wh _col(16) "regress " in gr " lhsvar rhsvar1 rhsvar2 ..." _n(2) in gr
"where lhsvar is the dependent variable and rhsvar1, rhsvar2, etc., are the" _n
"exogenous or control variables. The right-hand-side variables may include" _n
"polynomial terms for estimating quadratic or higher order equations, logs," _n
"or other functions. Stata's '"
in wh "generate" in gr "' command performs these (and many other)" _n
"transformations." _n ;
di in gr
"On PCs under DOS, up to 38 right-hand-side variables may be specified in a" _n
"regression; under Unix, you may include up to 255, although to specify more" _n
"than 38 you'll have to learn about the '"
in wh "set matsize" in gr "' command. You might type" _n
"'" in wh "help matsize" in gr "' at the conclusion of this tutorial." _n
in wh _dup(79) "-" _n(8) ;
set more 0 ; more ; set more 1 ;
di in wh _dup(79) "-" _n in gr
"We wish to estimate the equation" _n(2)
" drate = b + b medage + b medagesq + b pcturban" _n
" 0 1 2 3" _n(2)
"where drate is the state's death rate, medage is the median age of the pop-" _n
"ulation of the state, medagesq is the median age squared, and pcturban is" _n
"the percentage of the population living in urban areas." _n
in wh _dup(79) "-" _n(13)
". regress drate medage medagesq pcturban" ;
set more 0 ; more ; set more 1 ;
noisily regress drate medage medagesq pcturban ;
di ; set more 0 ; more ; set more 1 ;
di _n(16) in wh _dup(79) "-" _n in gr
"We find the t-statistic on medage is 0.402 and on medagesq is 0.115 -- Wait!"
_n
"You don't remember that? Type "
in wh "regress" in gr " without arguments to see the results"
_n
"again." _n
in wh _dup(79) "-" _n(2)
". regress" ;
set more 0 ; more ; set more 1 ;
noisily regress ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"In our model, the t-statistic on medage is 0.402 and on medagesq is 0.115," _n
"so neither is individually significant. We might wonder if the two variables"
_n
"are jointly significant." _n(2)
"The "
in wh "test"
in gr " command allows you to test hypotheses about your model. The simplest"
_n
"form of "
in wh "test"
in gr " is the joint test that one or more variables have coefficients" _n
"equal to zero. The syntax is:" _n ;
di _col(16) in wh "test" in gr " varname [varname ...]" _n(2)
in wh _dup(79) "-" _n(4)
". test medage medagesq" ;
noisily test medage medagesq ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n
"test" in gr
" can test any linear hypothesis about the coefficients. The syntax for" _n
"these kinds of tests is:" _n(2)
_col(16) in wh "test" in gr " expression " in wh "=" in gr " expression" _n(2)
"For instance, we can test whether the coefficient on medage is equal to twice"
_n
"the coefficient on medagesq by typing:" _n
in wh _dup(79) "-" _n(8)
". test medage=2*medagesq" ;
noisily test medage=2*medagesq ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n
"test" in gr
" has the ability to perform algebra, so you can state hypotheses any way" _n
"you desire. We just tested whether the coefficient on medage is twice the" _n
"coefficient on medagesq by typing '"
in wh "test medage=2*medagesq" in gr "'." _n(2)
"Here's another way of achieving the same result:" _n
in wh _dup(79) "-" _n(10)
". test 2*(medage-medagesq)-(medage-medagesq)/2=(medage-medagesq)/2+medagesq";
noisily
test 2*(medage-medagesq)-(medage-medagesq)/2=(medage-medagesq)/2+medagesq ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n
"test" in gr
" can be used to test multiple hypotheses as well as a single hypothesis." _n
"You do this by specifying the '"
in wh "accumulate" in gr "' option. We just tested whether" _n
"the coefficient on medage is twice the coefficient on medagesq. Let's now" _n
"add an additional constraint and test whether the coefficient on pcturban is"
_n
"also equal to the coefficient on medage:" _n
in wh _dup(79) "-" _n(10)
". test pcturban=medage, accumulate" ;
noisily test pcturban=medage, accumulate ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"Even after all this time, Stata will redisplay the last estimated model when"
_n
"we type the estimation command without arguments:" _n
in wh _dup(79) "-" _n(2)
". regress" ;
noisily regress ;
set more 0 ; more ; set more 1 ;
di _n(4) in wh _dup(79) "-" _n in gr
"After estimating a model, you can obtain the covariance matrix of the esti-" _n
"mators, presented as a correlation matrix, by typing '"
in wh "correlate, _coef" in gr "':" _n
in wh _dup(79) "-" _n(11)
". correlate, _coef" _n ;
noisily correlate, _coef ;
set more 0 ; more ; set more 1 ;
di _n(4) in wh _dup(79) "-" _n in gr
"You can obtain the covariance matrix of the estimators by typing '"
in wh "correlate," _n
"_coef covariance"
in gr "'. As with all Stata commands, you can abbreviate:" _n
in wh _dup(79) "-" _n(11)
". cor, _c cov" _n ;
noisily corr, _c cov ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"Stata can calculate an abundance of statistics from the regression, including:"
_n(2)
_col(16) "predicted values" _n
_col(16) "residuals" _n
_col(16) "standardized residuals" _n
_col(16) "Studentized residuals" _n
_col(16) "standard error of the predictions" _n
_col(16) "standard error of the residuals" _n
_col(16) "Influence statistics:" _n
_col(24) "Cook's distance" _n
_col(24) "diagonal elements of the projection matrix" _n
_col(24) "DF-Betas" _n(2)
"All of this is done by a single command: "
in wh "predict" in gr ". Its syntax is:" _n(2)
_col(16) in wh "predict"
in gr " new-variable-name [" in wh "," in gr
" name-of-statistic]" _n ;
di in gr
"If we do not specify the name of a statistic, "
in wh "predict" in gr " calculates predicted" _n
"values." _n
in wh _dup(79) "-" _n(3)
". predict dhat" ;
predict dhat ;
set more 0 ; more ; set more 1 ;
di in wh _n ". summarize drate dhat" ;
noisily summarize drate dhat ;
di _n(2) in wh _dup(79) "-" _n in gr
"Since we did not specify what statistic we wanted "
in wh "predict" in gr " to calculate, we" _n
"obtained the predicted values. We used "
in wh "predict" in gr " to generate predictions for" _n
"the same data on which we estimated the model, and we see that the mean of" _n
"the predictions is equal to the mean of the predicted variable." _n(2)
in wh "predict" in gr
" can also make out-of-sample predictions, even on different data sets."
_n(2)
"As one more example, let's calculate the values of Cook's Distance, an influ-" _n
"ence measure:" _n
in wh _dup(79) "-" _n(2)
". predict influ, cooksd" ;
predict influ, cooksd ;
set more 0 ; more ; set more 1 ;
di _n in wh ". summarize influ, detail" ;
noisily summarize influ, detail ;
di _n in wh _dup(79) "-" _n in gr
"Note that one observation is extremely influential. Let's find it." _n
in wh _dup(79) "-" ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh ". list state if influ>1" ;
noisily list state if influ>1 ;
di _n in wh _dup(79) "-" _n(2) in gr
"Stata's estimation commands work the same way that all of Stata's commands" _n
"work, so you have the full power of Stata's syntax at your disposal." _n(2)
"We just discovered that Utah was a heavily influential state in our regres-" _n
"sion. It is the only state that has a value of Cook's Distance in excess" _n
"of 1. We can now reestimate our equation excluding this observation without"
_n
"creating a new data set. We type:" _n
in wh _dup(79) "-" _n(9)
". regress drate medage medagesq pcturban if influ<1" ;
set more 0 ; more ; set more 1 ;
noisily regress drate medage medagesq pcturban if influ<1 ;
di _n ; set more 0 ; more ; set more 1 ;
drop dhat influ ;
di _n(2) in wh _dup(79) "-" _n in gr
"In addition to estimating linear regressions, Stata can estimate weighted" _n
"regressions as well. You do not have to be concerned with the normalization"
_n
"of the weight or any of the other details unless, of course, you want to." _n
"Stata has ways of letting you take control." _n(2)
"We have in our data the variable 'pop' which records the population of each"
_n
"state:" _n
in wh _dup(79) "-" _n(2)
". summarize pop" ;
noisily summarize pop ;
di _n(3) in wh _dup(79) "-" _n in gr
"We can reestimate our model using weighted regression by typing:" _n
in wh _dup(79) "-" _n(2)
". regress drate medage medagesq pcturban =pop" ;
set more 0 ; more ; set more 1 ;
noisily regress drate medage medagesq pcturban =pop ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"In terms of how you interact with Stata, weighted regressions are no different"
_n
"than ordinary regressions. You could now perform hypotheses tests, examine" _n
"the covariance matrix, or calculate predicted values using the same commands"
_n
"we've already illustrated." _n
in wh _dup(79) "-" _n(1) ;
di _n(7) in wh _dup(79) "-" _n in gr
"The "
in wh "stepwise"
in gr " command performs stepwise linear regression; its syntax is the" _n
"same as that for "
in wh "regress" in gr ", and it has all the same capabilities. After esti-"
_n
"mating a model with "
in wh "stepwise" in gr ", you can use "
in wh "correlate" in gr ", " in wh "test" in gr ", and "
in wh "predict" in gr ", just" _n
"as you would after " in wh "regress" in gr "." _n
in wh _dup(79) "-" _n(3)
". stepwise drate medage medagesq pcturban" ;
set more 0 ; more ; set more 1 ;
di ; noisily stepwise drate medage medagesq pcturban ;
di _n ; set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"By default, "
in wh "stepwise"
in gr " performs backward variable elimination, although it" _n
"can also perform forward or forward and backward (stepwise) elimination." _n
"The default F-statistic for variable entry is 0.5 and the default for" _n
"keeping a variable is 0.1. All of this is controllable through options:" _n ;
di
_col(9) in wh "backward"
_col(25) in gr "performs backward elimination and is the default" _n
_col(9) in wh "forward"
_col(25) in gr "performs forward selection" _n
_col(9) in wh "stepwise"
_col(25) in gr "performs forward selection and backward elimination" _n(2)
_col(9) in wh "fenter(" in gr "#" in wh ")"
_col(25) in gr "specifies F-value required for variable entry" _n
_col(9) in wh "fstay(" in gr "#" in wh ")"
_col(25) in gr "specifies F-value required to keep a variable" _n(2)
"You can learn more about the "
in wh "stepwise" in gr " command by typing '"
in wh "help stepwise" in gr "'" _n
"at the conclusion of this tutorial." _n
in wh _dup(79) "-" _n(8) ;
set more 0 ; more ; set more 1 ;
di _n(6) in wh _dup(79) "-" _n in gr
"As our last example, we will show you Stata's ability to perform instrumental"
_n
"variable estimation or two-stage least squares. We're going to use a "
"different" _n
"data set; on the next screen we'll show it to you:" _n
in wh _dup(79) "-" _n(2)
". use %path`hsng, clear" ;
noisily use %path`hsng, clear ;
di _n in wh ". keep hsngval faminc rent pcturban region" ;
keep hsngval faminc rent pcturban region ;
tabulate region, gen(reg) nofreq ;
set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"The data records the median housing values and rents across the states based"
_n
"on the 1980 Census." _n
in wh _dup(79) "-" _n(4)
". describe" ;
noisily describe ;
di ; set more 0 ; more ; set more 1 ;
di _n(2) in wh _dup(79) "-" _n in gr
"We want to estimate the following model:" _n(2)
" hsngval = a + a faminc + a reg2 + a reg3 + a reg4" _n
" 0 1 2 3 4" _n(2)
" rent = b + b hsngval + b pcturban" _n
" 0 1 2" _n ;
di in gr
"The "
in wh "regress"
in gr " syntax for instrumental variable and two-stage least squares is:" _n(2)
_col(8) in wh "regress" in gr
" lhsvar rhsvar1 rhsvar2 ... " in wh "(" in gr
"exogvar1 exogvar2 ... " in wh ")" in gr _n(2)
"So, to estimate the rent equation, we type:" _n
in wh _dup(79) "-" _n(9)
". regress rent hsngval pcturban (faminc reg2-reg4 pcturban)" ;
set more 0 ; more ; set more 1 ;
noisily regress rent hsngval pcturban (faminc reg2-reg4 pcturban) ;
di _n(2) in wh _dup(79) "-" _n in gr
"We could now use any of the Stata commands to perform hypothesis testing or"
_n
"we could calculate predicted values." _n
in wh _dup(79) "-" ;
drop _all ;
label drop _all ;
macro define F6 "do %path`anova.tut;" ;
set more 0 ; more ; set more 1 ;
di _n(4) in white
"Demonstration ends" _n
"------------------" _n ;
di in green
"That concludes our short demonstration, but there's much more. We now return"
_n
"control to you. Some suggestions:" _n ;
di in green
"If you ..." _col(34) "Then we will show you ..." _n
" Press " in white "F5" in green _col(38) "a table of tutorial contents" _n
" Press " in white "F6" in green _col(38) "the next tutorial, "
in white "anova.tut" _n ;
run %path`tobuy.tut ;